Goals

Using pre-processed data containing time stamped Vehicle Position Logs (VPLs), I want to create animated maps that show the following:

  1. The position of each bus in the city on 5 minute intervals
  2. The average prediction for derivation from the schedule (early or late) for all busses in each community, by hour
  3. The average speed for all busses passing through each community, by hour

NOTE: The animate commands take a very long time to run, so make sure you comment them out before running all code chunks.

Loading Libraries and Data

First, I load in the vehicle position logs along with libraries for aggregating and manipulating data, working with dates, and working with geospatial information. I also load in a shapefile of Calgary community boundaries from the City of Calgary Open Data Catalogue here.

library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: /usr/share/gdal/2.2
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.3-2
library(rgeos)
## rgeos version: 0.5-2, (SVN revision 621)
##  GEOS runtime version: 3.6.2-CAPI-1.10.2 
##  Linking to sp version: 1.3-2 
##  Polygon checking: TRUE
library(maptools)
## Checking rgeos availability: TRUE
library(gifski)
library(gganimate)
library(transformr)

vpl_15052019 <- readRDS("vpl_15052019.rds")
community_boundaries <- readOGR("geo_export_e135fc9f-0eec-429a-b94b-14836af732d4.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "/home/alex/Documents/calgary_transit_egs/geo_export_e135fc9f-0eec-429a-b94b-14836af732d4.shp", layer: "geo_export_e135fc9f-0eec-429a-b94b-14836af732d4"
## with 306 features
## It has 7 fields
community_bounds_df <- fortify(community_boundaries,
                               region = "name")

First Map: Vehicle Locations

For this map, I need to have a data frame with the time stamps synchronized so that I can plot them on the same frame. I do this by rounding each time stamp to the nearest 5 minutes, and then aggregating the vehicle positions by taking the mean of the vehicle positions over that 5 minute interval.

vpl_5_min_intervals <- vpl_15052019
vpl_5_min_intervals$vehicle_position_date_time <-
  round_date(vpl_5_min_intervals$vehicle_position_date_time,
             unit = "5 minutes")
vpl_5_min_intervals <- group_by(vpl_5_min_intervals,
                                vehicle_position_date_time,
                                vehicle_id) %>% 
  summarise(longitude = mean(longitude),
            latitude = mean(latitude)) %>% 
  arrange(vehicle_position_date_time, vehicle_id)

Now I generate the animated map. Note that the transition_states() argument only applies to the layers using the data passed in to ggplot(). Also note that coord_map() uses the mercator projection by default.

p <- ggplot(vpl_5_min_intervals,
            aes(x = longitude,
                y = latitude)) +
  geom_point(aes(group = vehicle_id),
             colour = "red") +
  labs(title = "Position of YYC Transit Vehicles",
       subtitle = "{closest_state}",
       caption = "Data: City of Calgary Open Data\nCreated by Alexander Ondrus") +
  geom_polygon(data = community_bounds_df,
               aes(x = long,
                   y = lat,
                   group = group),
               fill = NA,
               colour = "grey") +
  theme_void() +
  theme(plot.margin = margin(5,5,5,5)) +
  coord_map() +
  transition_states(vehicle_position_date_time,
                    transition_length = 4,
                    state_length = 1)
  
animate(p, nframes = 1440, duration = 60)

Second Map: Early or Late?

For the next map, I want to colour each community by the mean predicted deviation for all of the transit vehicle that cross through that community in a given 15-minute span.

vpl_deviation <- vpl_15052019 %>% 
  mutate(time_rounded_1h = round_date(vehicle_position_date_time,
                                        unit = "1 hour")) %>% 
  group_by(time_rounded_1h,
           community) %>% 
  summarise(med_deviation = median(predicted_deviation))

vpl_deviation$deviation <- cut_interval(vpl_deviation$med_deviation, 
                                        n = 7,
                                        dig.lab = 2,
                                        ordered_result = TRUE)

Note that vpl_deviation does not contain NA values for time-community combinations that are not present, they are simply not listed. I need the NA values to be present in order for them to be represented on the choropleth.

To make this possible, I will take the Cartesian product of the community_bounds_df with the unique time values before attaching the mean deviations with a left_join().

times <- vpl_deviation %>% 
  arrange(time_rounded_1h) %>% 
  select(time_rounded_1h) %>% 
  distinct()

community_bounds_w_times <- crossing(community_bounds_df,
                                     times) %>% 
  arrange(order, time_rounded_1h)

community_bounds_w_deviation <- left_join(community_bounds_w_times,
                                          vpl_deviation,
                                          by = c("id" = "community",
                                                 "time_rounded_1h" = "time_rounded_1h"))
## Warning: Column `id`/`community` joining character vector and factor, coercing
## into character vector

I am now in a position to map the data using geom_polygon with the fill mapped to the mean deviation.

map_2 <- ggplot(community_bounds_w_deviation,
                aes(x = long,
                    y = lat,
                    group = group)) +
  geom_polygon(aes(fill = deviation),
               colour = "white") +
  coord_map() +
  theme_void() +
  theme(plot.margin = margin(5,5,5,5)) +
  transition_time(time_rounded_1h) +
  scale_fill_brewer(type = "div",
                    palette = "Spectral") + 
  labs(title = "Will YYC Transit Vehicles be Early or Late?",
       subtitle = "Median predicted deviation from schedule for all vehicles\nin each community at times closest to {frame_time}",
       caption = "Data: City of Calgary Open Data Catalogue\nCreated by: Alexander Ondrus",
       fill = "Predicted Deviation")

animate(map_2, nframes = 25, duration = 60)

Third Map: Vehicle Speed

Similar to how the last map was constructed, I will first have to summarize the speed of the busses in each community for each hour and then I will attach that data to the polygon data frame.

Rather than use the raw speed for each community, I will first scale the speeds of all vehicles across all hours and then take the median scaled value within each community for each hour. Changes in this value will then represent deviations from what is “normal” for that community.

vpl_speed <- vpl_15052019 %>% 
  group_by(community) %>% 
  mutate(scaled_speed = (average_speed - mean(average_speed))/mean(average_speed)) %>%
  ungroup() %>% 
  mutate(time_rounded_1h = round_date(vehicle_position_date_time,
                                      unit = "1 hour")) %>% 
  group_by(time_rounded_1h,
           community) %>% 
  summarise(mean_speed_percent = mean(scaled_speed)) %>% 
  filter(mean_speed_percent <1)

vpl_speed$speed_bins <- cut_interval(vpl_speed$mean_speed_percent,
                                     n = 7,
                                     dig.lab = 2,
                                     ordered_result = TRUE)

I can perform a similar joining operation to what was done previously for the deviations choropleth.

times <- vpl_speed %>% 
  arrange(time_rounded_1h) %>% 
  select(time_rounded_1h) %>% 
  distinct()

community_bounds_w_times <- crossing(community_bounds_df,
                                     times) %>% 
  arrange(order, time_rounded_1h)

community_bounds_w_speed <- left_join(community_bounds_w_times,
                                          vpl_speed,
                                          by = c("id" = "community",
                                                 "time_rounded_1h" = "time_rounded_1h"))
## Warning: Column `id`/`community` joining character vector and factor, coercing
## into character vector

The mapping commands also are very similar to what was done for the previous map.

map_3 <- ggplot(community_bounds_w_speed,
                aes(x = long,
                    y = lat,
                    group = group)) +
  geom_polygon(aes(fill = speed_bins),
               colour = "grey") +
  coord_map() +
  theme_void() +
  theme(plot.margin = margin(5,5,5,5)) +
  transition_time(time_rounded_1h) +
  scale_fill_brewer(type = "div",
                    palette = "Spectral") + 
  labs(title = "When are YYC Vehicles Moving the Fastest\n(or Slowest) in Each Community?",
       subtitle = "Percent above or below the mean daily speed at {frame_time}.",
       caption = "Data: City of Calgary Open Data Catalogue\nCreated by: Alexander Ondrus",
       fill = "% as a decimal")

animate(map_3, nframes = 25, duration = 60)

Finally, for comparison, I create a facet wrapped version that displays each frame on its own slide. Unfortunately, facet_wrap appears to glitch when faceting by times, so I will convert it into an ordered factor first.

times_sorted <- sort(unique(community_bounds_w_speed$time_rounded_1h)) %>% 
  as.character()
community_bounds_w_speed$time_rounded_1h <-  as.character(
  community_bounds_w_speed$time_rounded_1h
) %>% 
  factor(
  levels = times_sorted,
  ordered = TRUE
)
map_3_facet <- ggplot(community_bounds_w_speed,
                aes(x = long,
                    y = lat,
                    group = group)) +
  geom_polygon(aes(fill = speed_bins),
               colour = "grey") +
  coord_map() +
  theme_void() +
  theme(plot.margin = margin(5,5,5,5)) +
  facet_wrap("time_rounded_1h", nrow = 5) +
  scale_fill_brewer(type = "div",
                    palette = "Spectral") + 
  labs(title = "When are YYC Vehicles Moving the Fastest\n(or Slowest) in Each Community?",
       subtitle = "Percent above or below the mean daily speed at each time.",
       caption = "Data: City of Calgary Open Data Catalogue\nCreated by: Alexander Ondrus",
       fill = "% as a decimal")

plot(map_3_facet)